/*++ Module: Compress Programming Assignment #2 CSE 373 2000 Winter Quarter University of Washington Description: This module demonstrates a lossless compression strategy loosely based on the LZRW and LZNT1 format. Author: Gary Kimura [GaryKi@cs.washington.edu] 12-Feb-2000 Revision History: --*/ #include #define TRUE 1 #define FALSE 0 // // Global variables // // // Declare the variables used to hold and describe the state of the input and // output buffers // unsigned char InputBuffer[50000]; int InputBufferLength; unsigned char OutputBuffer[50000]; int OutputBufferLength; // // The index within the output buffer of the current control byte we're building // int CurrentControlByteIndex; // // The number of tokens we're currently put in the output buffer // int OutputTokenCount; // // Declare the variables used to store the hash table. The hash table stores // the input buffer index. // int HashTable[256]; // // Local procedure prototypes for doing I/O // int ReadBuffer ( unsigned char *Buffer, int Limit ); void WriteBuffer ( unsigned char *Buffer, int Length ); // // Local procedures prototypes for emitting tokens // void EmitDataToken ( unsigned char Data ); void EmitCopyToken ( unsigned char Offset, unsigned char Length ); // // Local procdures prototypes for finding a match // int FindMatch ( int CurrentInputBufferIndex, int *Offset ); int CompareStrings ( int InputBufferIndex1, int InputBufferIndex2 ); int LookupTableEntry ( int InputBufferIndex, char UpdateTableEntry ); void main( int argc, char *argv[] ) /*++ Routine Description: This routine starts up the demonstration compression program Arguments: argc - The number of command line arguments argv - The actual command line arguments Return Value: None. --*/ { int CurrentInputBufferIndex; int Length; int Offset; // // Fix up standard input and output to be binary files // if (argc >= 2) { if (stdin != freopen( argv[1], "rb", stdin )) { fprintf(stderr, "Redirection of stdin error\n"); return; } } if (argc >= 3) { if (stdout != freopen( argv[2], "wb", stdout )) { fprintf(stderr, "Redirection of stdout error\n"); return; } } // // To be clean we'll zero out the hash table before we do any real work // memset( HashTable, -1, sizeof(HashTable)); // // Read in input buffer // InputBufferLength = ReadBuffer( InputBuffer, 50000 ); // // Initialize the output control variables // OutputBufferLength = 0; CurrentControlByteIndex = 0; OutputTokenCount = 0; // // To do the compression we'll loop through the input buffer using // CurrentInputBufferIndex to control where we're at in the input // buffer and to decide when to terminate the loop // CurrentInputBufferIndex = 0; while (CurrentInputBufferIndex < InputBufferLength) { // // Lookup the length and offset of a previous match // Length = FindMatch( CurrentInputBufferIndex, &Offset ); // // Now independent if we found a match we'll update the // table with the current index // LookupTableEntry( CurrentInputBufferIndex, TRUE ); // // If the previous match is greater than 3 bytes, and within 255 // bytes of the current index then we can emit a copy token, but // only after we trim the length to at most 255 bytes // if ((Length >= 3) && (Offset <= 255)) { Length = (Length <= 255 ? Length : 255); // // Emit the copy token and increment the input buffer index // by the amount just copied // EmitCopyToken( (unsigned char)Offset, (unsigned char)Length ); CurrentInputBufferIndex += Length; } else { // // Emit a single data token and update the index by just 1 // EmitDataToken( InputBuffer[CurrentInputBufferIndex] ); CurrentInputBufferIndex += 1; } } // // Write out the output buffer // WriteBuffer( OutputBuffer, OutputBufferLength ); return; } int ReadBuffer ( unsigned char *Buffer, int Limit ) /*++ Routine Description: A simply routine to simply read in from stdin and fill the buffer up to the limit Arguments: Buffer - A pointer to the buffer we're to fill Limit - The maximum number of characters we can use in the buffer Return Value: int - The actual number of bytes read in --*/ { int c,i; // // Loop until we've exhausted the input or the limit // i = 0; while ((--Limit > 0) && ((c = getchar()) != EOF)) { Buffer[i++] = c; } // // And return the number of bytes we're read in // return i; } void WriteBuffer ( unsigned char *Buffer, int Length ) /*++ Routine Description: A simple routine to output a buffer to stdout Arguments: Buffer - Supplies a pointer to the buffer being output Length - Supplies the length, in bytes, of the output buffer Return Value: None. --*/ { int i; for (i = 0; i < Length; i += 1) { putchar(Buffer[i]); } return; } void EmitDataToken ( unsigned char Data ) /*++ Routine Description: This routine puts a data token into the compressed output buffer at the next token location and updates the control byte as necessary Arguments: Data - Supplies the data being emitted Return Value: None. --*/ { // // Decide if we need to update to another control byte. If we've // already output a modulo of 8 tokens we need to do an update // if ((OutputTokenCount % 8) == 0) { CurrentControlByteIndex = OutputBufferLength; OutputBufferLength += 1; } // // Clear the bit in the current control byte to indicate a data token // OutputBuffer[ CurrentControlByteIndex ] &= ~(1 << (OutputTokenCount % 8)); // // Output the data byte // OutputBuffer[ OutputBufferLength ] = Data; // // Update the global variables for the next time around // OutputTokenCount += 1; OutputBufferLength += 1; // // And return to our caller // return; } void EmitCopyToken ( unsigned char Offset, unsigned char Length ) /*++ Routine Description: This routine puts a copy token in the compressed output buffer and updates the control byte as necessary Arguments: Offset - Supplies the offset value to use in the copy token Length - Supplies the length to use in the copy token Return Value: none. --*/ { // // Decide if we need to update to another control byte. If we've // already output a modulo of 8 tokens we need to do an update // if ((OutputTokenCount % 8) == 0) { CurrentControlByteIndex = OutputBufferLength; OutputBufferLength += 1; } // // Set the bit in the current control byte to indicate a copy token // OutputBuffer[ CurrentControlByteIndex ] |= (1 << (OutputTokenCount % 8)); // // Output the offset and length bytes // OutputBuffer[ OutputBufferLength ] = Offset; OutputBuffer[ OutputBufferLength + 1 ] = Length; // // Update the global variables for the next time around // OutputTokenCount += 1; OutputBufferLength += 2; // // And return to our caller // return; } int FindMatch ( int CurrentInputBufferIndex, int *Offset ) /*++ Routine Description: This routine uses the hash table to find a potential match. The routine knows to search the input buffer Arguments: CurrentInputBufferIndex - Supplies the index within the input buffer of the string we're trying to match Offset - Receives the offset from the CurrentInputBufferIndex back to the place where the match was found Return Value: int - Returns the number of bytes that actually matched --*/ { int PreviousInputBufferIndex; int Length; // // Lookup the hash table entry // PreviousInputBufferIndex = LookupTableEntry( CurrentInputBufferIndex, FALSE ); // // Use the hash table entry to compare the current input location with the // saved location // Length = CompareStrings( CurrentInputBufferIndex, PreviousInputBufferIndex ); // // The offset we compute is the difference between the two indices // *Offset = CurrentInputBufferIndex - PreviousInputBufferIndex; // // And return the match length to our caller // return Length; } int CompareStrings ( int InputBufferIndex1, int InputBufferIndex2 ) /*++ Routine Description: This routine compares two strings in the input buffer. It knows to stop when it hits the end of the input buffer Arguments: InputBufferIndex1 - Supplies the index to the first string InputBufferIndex2 - Supplies the index to the second string Return Value: int - Returns the number of characters that match between the two strings --*/ { int Length; Length = 0; // // Loop through incrementing length each time until we're exhausted // the string or we don't match // while ((InputBufferIndex1 + Length < InputBufferLength) && (InputBufferIndex2 + Length < InputBufferLength) && (InputBuffer[InputBufferIndex1 + Length] == InputBuffer[InputBufferIndex2 + Length])) { Length += 1; } // // And return to our caller // return Length; } int LookupTableEntry ( int InputBufferIndex, char UpdateTableEntry ) /*++ Routine Description: This routine looks up an entry in the hash table and optionally replaces the entry Arguments: InputBufferIndex - Supplies the input buffer index for the key we're looking up UpdateTableEntry - Indictes if we are to update the table entry after the lookup is performed Return Value: int - Returns the value stored in the hash table --*/ { int HashIndex; int Entry; // // Generate a hash index from the 3 input buffer characters // HashIndex = InputBuffer[ InputBufferIndex ]; HashIndex = ((HashIndex << 8) | (HashIndex >> 4)); HashIndex = ((HashIndex) ^ (InputBuffer[ InputBufferIndex + 1 ]) ^ (InputBuffer[ InputBufferIndex + 2 ] << 4)) & 0xff; // // Capture the hash table entry // Entry = HashTable[HashIndex]; // // If we are to update the entry do so now // if (UpdateTableEntry) { HashTable[ HashIndex ] = InputBufferIndex; } // // And return to our caller // return Entry; }